clear

* Change this to the folder containing the subfolders scripts, input and output.

set seed 1073741823

capt ssc install randomselect


* ########## WAVE 1 ##########	

	
/* First session */
insheet using "${pathdata_baseline}/StatisticsQualitativeContent_raw_wave1.csv", names
drop in 1/2

drop if prolific_pid == "{{%PROLIFIC_PID%}}"
drop if prolific_pid == ""

destring *, replace 

	keep if writedowninfo==2
	
	gen random_draw = runiform()
	tab random_draw

	// ONLY NEEDED FOR BONUS PAYMENTS
	//draw selected participants for bonus
	randomselect, gen(sample) prop(0.1)
	tab sample

	// Randomly draw participants who receive a bonus (30$) for the belief stated in 1 and 2, or the structured recall question (denoted by 3 for typeofinfo_recall and 4 for valence_recall)
	gen bonus_belief = .
	replace bonus_belief = 1 if sample == 1 & random_draw <= 0.5
	replace bonus_belief = 2 if sample == 1 & random_draw > 0.5 & random_draw <= 0.75
	replace bonus_belief = 3 if sample == 1 & random_draw > 0.75 & random_draw <= 0.875
	replace bonus_belief = 4 if sample == 1 & random_draw > 0.875

	// Randomly draw product for which the bonus payment (30$) is carried out, 

	randomselect if sample == 1, gen(bicycle) prop(0.333)
	randomselect if sample == 1 & bicycle == 0, gen(restaurant) prop(0.5)

	// Note 1: videogame; 2: bicycle; 3: restaurant
	gen bonusproduct = ""
	replace bonusproduct = "videogame" if sample == 1 & bicycle ==0 & restaurant==0
	replace bonusproduct = "bicycle" if sample == 1 & bicycle ==1
	replace bonusproduct = "restaurant" if sample == 1 & restaurant ==1
	tab bonusproduct

	/* Variables to keep */
	keep condition_* p_guess* task* wtp_* prolific_pid positive_stat negative_stat tot_reviews* story_type* truepos_fraction* sample bonus_belief bonusproduct qid9 qid124 qid12 qid18 durationinseconds finished comprehension1 comprehension2 comprehension3 comprehension5 screenedout writedowninfo time_restaurant v44 _q230_pagesubmit 

			
		rename qid9 gender 

		rename qid124 age

		rename qid12 education

		rename qid18 employment
		
		
		
		*time spent on decision page for videogame, restaurant and bicycle
		rename _q230_pagesubmit pagetime_videogame
		
		*rename v39 pagetime_restaurant
		
		rename v44 pagetime_bicycle

		* Indicator Male
		gen male = .
		replace male = 0 if inlist(gender, 2,3)
		replace male = 1 if gender == 1

		* Indicator Bachelor's degree or more
		gen college = .
		replace college = 0 if inlist(education, 1,2,3,4)
		replace college = 1 if inlist(education, 5,6,7,8)

		* Indicator Employment status
		gen employed = .
		replace employed = 0 if inlist(employment,3,4)
		replace employed = 1 if inlist(employment,1,2)
		
		* Indicator finished wave 1 - EMPLOYMENT IS THE LAST MANDATORY QUESTION IN THE SURVEY
		gen completed_wave1 = 0
		replace completed_wave1 = 1 if inlist(employment,1,2,3,4)

		
		

		duplicates drop prolific_pid, force
		
		

		/* Wide to long */
		reshape long condition_ p_guess_ wtp_ task_ truepos_fraction_ pagetime_, i(prolific_pid) j(product) string

		
		* Compute bayesian posterior
		gen total_reviews = -9999
		replace total_reviews = 14 if product=="videogame"
		replace total_reviews = 19 if product=="restaurant"
		replace total_reviews = 17 if product=="bicycle"
		
		
		gen sample_reviews = tot_reviews_pos if inlist(condition_, "n>1_nocontext_pos", "n>1_context_pos")
		replace sample_reviews = tot_reviews_neg if inlist(condition_, "n>1_nocontext_neg", "n>1_context_neg")
		
		gen sample_positive_reviews = positive_stat if inlist(condition_, "n>1_nocontext_pos", "n>1_context_pos")
		replace sample_positive_reviews = negative_stat if inlist(condition_, "n>1_nocontext_neg", "n>1_context_neg")
		
		

		gen bayesian_posterior = sample_positive_reviews/total_reviews + (total_reviews-sample_reviews) / total_reviews * (sample_positive_reviews+1)/(sample_reviews+2)
		
		gen treatment = "noinfo"
		replace treatment = "n>1_context" if inlist(condition_, "n>1_context_pos", "n>1_context_neg")
		replace treatment = "n=1_context" if inlist(condition_, "n=1_context_pos", "n=1_context_neg")
		replace treatment = "n=1_nocontext" if inlist(condition_, "n=1_nocontext_pos", "n=1_nocontext_neg")
		replace treatment = "n>1_nocontext" if inlist(condition_, "n>1_nocontext_pos", "n>1_nocontext_neg")
		
		gen excess_wtp = wtp_ - 50
		replace excess_wtp = wtp_ - 600 if product == "bicycle"
		replace excess_wtp = wtp_ - 30 if product == "restaurant"
		
		gen net_wtp = excess_wtp
		
	
	
		replace excess_wtp = 50 - wtp_ if product == "videogame" & inlist(condition_, "n>1_context_neg", "n>1_nocontext_neg", "n=1_context_neg", "n=1_nocontext_neg")
	replace excess_wtp = 600 - wtp_ if product == "bicycle" & inlist(condition_, "n>1_context_neg", "n>1_nocontext_neg", "n=1_context_neg", "n=1_nocontext_neg")
	replace excess_wtp = 30 - wtp_ if product == "restaurant" & inlist(condition_, "n>1_context_neg", "n>1_nocontext_neg", "n=1_context_neg", "n=1_nocontext_neg")
		
		
		encode condition_, gen(condition__)
		encode treatment, gen(treatment_numeric)
		tabulate treatment_numeric, nolabel
		tabulate treatment, nolabel

		
		gen abs_belief_update = abs(bayesian_posterior - 0.5)
		
		mean abs_belief_update, over(condition__)
		
		anova abs_belief_update condition__
		 
		anova abs_belief_update condition__
		reg, baselevels
		

*/
save "${pathdata_baseline}/temp/StatisticsQualitativeContent_long.dta", replace
	

* ########## WAVE 2 ##########	
	
clear

/* Second (recall) session */
insheet using "${pathdata_baseline}/StatisticsQualitativeContent_raw_wave2.csv", names
drop in 1/2

destring *, replace
	
	*** give w2 duration in seconds a new name so it does not overwrite wave1
	rename durationinseconds durationinseconds2
	
	/* Variables to keep */	
	keep beliefs_* cu_* valence_recall_* typeofinfo_recall_* wtp_* prolific_pid screenedout v44 v50 v33 v32 _beliefs_first durationinseconds2


	drop if prolific_pid == ""
	sort prolific_pid
	quietly by prolific_pid:  gen dup = cond(_N==1,0,_n)
	drop if dup > 0
	drop dup


	drop v32 _beliefs_first
	
	rename wtp_bicycle wtp_recall_bicycle
	rename wtp_videogame wtp_recall_videogame
	rename wtp_restaurant wtp_recall_restaurant

	/* Wide to long */
	reshape long beliefs_ cu_ wtp_recall_ typeofinfo_recall_ valence_recall_, i(prolific_pid) j(product) string
	

save "${pathdata_baseline}/temp/StatisticsQualitativeContent_long_followup.dta", replace

* ########## MERGE WAVE 1 and WAVE 2 ##########	
	

	/* Merged */
	merge 1:1 prolific_pid product using "${pathdata_baseline}/temp/StatisticsQualitativeContent_long.dta"
	
* ########## CREATE DATA ON ATTRITION IN BETWEEN SUBJECT TREATMENTS ##########	

	
	* indicator whether subjects completed study 2
	gen completed = 0
	replace completed = 1 if _merge ==3
	
	* betweeen subject treatment indicators
	gen mixed = 0
	replace mixed = 1 if story_type == "mixed"
	
	gen neutral = 0
	replace neutral = 1 if story_type == "neutral"
	
	gen consistent = 0
	replace consistent = 1 if story_type == "consistent"
	
	* ##### Generate dataset with treatments and completion only
preserve
	* drop NAs
	drop if _merge==1

	keep prolific_pid completed mixed neutral consistent
	
	duplicates drop
	
	save "${pathdata_summary}/ContextAttrition.dta", replace
	
restore
	
	drop mixed neutral consistent
		
	keep if _merge==3
	drop _merge

	destring*, replace

	/* Exclusion Criteria */
	drop if screenedout == "True"
	drop if condition_ == "noinfo" & p_guess_ != 50

save "${pathdata_baseline}/temp/StatisticsQualitativeContent_merged.dta", replace





* ########## ADD VARIABLES TO MERGED DATA ##########	


/* Variables for analysis */

	/* 1 - Effect variables */
	gen diff = 50 - p_guess_
	gen diff_recall = 50 - beliefs_


	gen effect = diff
	replace effect = p_guess_ - 50 if inlist(condition_, "n>1_context_pos", "n>1_nocontext_pos", "n=1_context_pos", "n=1_nocontext_pos") 

	gen effect_recall = diff_recall
	replace effect_recall = beliefs_ - 50 if inlist(condition_, "n>1_context_pos", "n>1_nocontext_pos", "n=1_context_pos", "n=1_nocontext_pos")
	
	
	gen excess_wtp_recall = wtp_recall_ - 50
	replace excess_wtp_recall = wtp_recall_ - 600 if product == "bicycle"
	replace excess_wtp_recall = wtp_recall_ - 30 if product == "restaurant"
	
	gen net_wtp_recall = excess_wtp_recall
	
	replace excess_wtp_recall = 50 - wtp_recall_ if product == "videogame" & inlist(condition_, "n>1_context_neg", "n>1_nocontext_neg", "n=1_context_neg", "n=1_nocontext_neg")
	replace excess_wtp_recall = 600 - wtp_recall_ if product == "bicycle" & inlist(condition_, "n>1_context_neg", "n>1_nocontext_neg", "n=1_context_neg", "n=1_nocontext_neg")
	replace excess_wtp_recall = 30 - wtp_recall_ if product == "restaurant" & inlist(condition_, "n>1_context_neg", "n>1_nocontext_neg", "n=1_context_neg", "n=1_nocontext_neg")

	/* Drop missing products */
	drop if p_guess_ == .

	preserve

	drop if condition_ == "noinfo"

	*save "${pathdata_baseline}/working_data_1.dta", replace
	*stop /// the stop command was not recognized on my STATA version, so I commented it out (Simon, 22/10/30)

	/* Drop observations where updating is in the wrong direction */


		drop if effect < 0 & inlist(condition_, "n>1_nocontext_neg", "n>1_nocontext_pos", "n>1_context_pos", "n>1_context_neg", "n>=_nocontext_neg", "n=1_nocontext_pos", "n=1_context_pos", "n=1_context_neg")
		bysort prolific_pid : drop if _N==1

		
		
		
* ########## STRUCTURED RECALL ##############
		
	gen correct_recall = 0
	
	replace correct_recall = 1 if condition_ == "n>1_context_neg" & typeofinfo_recall_ == "Information on <strong>multiple </strong>reviews plus some <strong>anecdotal details</strong> about one reviewer and their experience with the product or venue" & valence_recall_ == "Negative" 
	
	replace correct_recall = 1 if condition_ == "n>1_context_pos" & typeofinfo_recall_ == "Information on <strong>multiple </strong>reviews plus some <strong>anecdotal details</strong> about one reviewer and their experience with the product or venue" & valence_recall_ == "Positive"
	
	replace correct_recall = 1 if condition_ == "n>1_nocontext_neg" & typeofinfo_recall_ == "Information on <strong>multiple </strong>reviews" & valence_recall_ == "Negative" 
	
	replace correct_recall = 1 if condition_ == "n>1_nocontext_pos" & typeofinfo_recall_ == "Information on <strong>multiple </strong>reviews" & valence_recall_ == "Positive" 
	
	replace correct_recall = 1 if condition_ == "n=1_nocontext_neg" & typeofinfo_recall_ == "Information on a <strong>single </strong>review" & valence_recall_ == "Negative" 
	
	replace correct_recall = 1 if condition_ == "n=1_nocontext_pos" & typeofinfo_recall_ == "Information on a <strong>single </strong>review" & valence_recall_ == "Positive"
	
	replace correct_recall = 1 if condition_ == "n=1_context_neg" & typeofinfo_recall_ == "Information on a <strong>single </strong>review plus some <strong>anecdotal details</strong> about the reviewer and their experience with the product or venue" & valence_recall_ == "Negative" 
	
	replace correct_recall = 1 if condition_ == "n=1_context_pos" & typeofinfo_recall_ == "Information on a <strong>single </strong>review plus some <strong>anecdotal details</strong> about the reviewer and their experience with the product or venue" & valence_recall_ == "Positive"
		
		
	
		
			
		
		

save "${pathdata_baseline}/StatisticsQualitativeContent.dta", replace



* ##### Save data for summary statistics

	keep prolific_pid male age college employed

	duplicates drop

	save "${pathdata_summary}/ContextSummary.dta", replace


restore






